library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggstatsplot)
## You can cite this package as:
## Patil, I. (2021). Visualizations with statistical details: The 'ggstatsplot' approach.
## Journal of Open Source Software, 6(61), 3167, doi:10.21105/joss.03167
library(haven)
library(phyloseq)
library(ggsci)
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-2
library(uwot)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(ggforce)
library(glue)
library(easystats)
## # Attaching packages: easystats 0.5.2.8 (red = needs update)
## ✔ bayestestR 0.13.0 ✖ correlation 0.8.2
## ✖ datawizard 0.6.0 ✖ effectsize 0.7.0.5
## ✖ insight 0.18.4 ✔ modelbased 0.8.5
## ✖ performance 0.9.2 ✖ parameters 0.18.2
## ✔ report 0.5.5.1 ✔ see 0.7.3
##
## Restart the R-Session and update packages in red with `easystats::easystats_update()`.
library(ggrepel)
library(gamlss)
## Loading required package: splines
## Loading required package: gamlss.data
##
## Attaching package: 'gamlss.data'
##
## The following object is masked from 'package:datasets':
##
## sleep
##
## Loading required package: gamlss.dist
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## Loading required package: nlme
##
## Attaching package: 'nlme'
##
## The following object is masked from 'package:dplyr':
##
## collapse
##
## Loading required package: parallel
## ********** GAMLSS Version 5.4-3 **********
## For more on GAMLSS look at https://www.gamlss.com/
## Type gamlssNews() to see new features/changes/bug fixes.
library(emmeans)
source("/storage/megalodon/R/import_kraken.R")
source("/storage/megalodon/R/kraken_heat_tree.R")
setwd("/storage/mariaInsenser/")
files <- dir("./reports/", pattern = "report", full.names = T)
reports <- import_kraken(files,20)
## Loading required package: doParallel
## Loading required package: foreach
##
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
##
## accumulate, when
## Loading required package: iterators
metadata <- read_tsv("LastMetadata.tsv")
## Registered S3 method overwritten by 'bit':
## method from
## print.ri gamlss
## Rows: 234 Columns: 94
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (7): Sample, Position, Run, Code_Sample, Description, GRUPO, Madre
## dbl (87): NumReads, Code_Microbiota, Studio, Orden, Heces, Grupos, Grupo3, D...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
metadata <- metadata %>% mutate(Description = ifelse(grepl("solo",Description),"Destete",Description))
design <- read_tsv("DesignStudy")
## Rows: 12 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): Group, Sex, Diet, Treatment
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
table <- reports %>%
distinct() %>%
inner_join(metadata)
## Joining, by = "Sample"
Distribucion de reads en cada grupo
table %>% mutate(Studio = as.factor(Studio)) %>%
filter(Name =="root") %>%
ggplot(aes(x=GRUPO, y = Reads, fill = Studio)) +
geom_boxplot() + facet_wrap(Studio ~ ., scale = "free_x")
Numero de reasd classificados y no clasificados
table %>%
filter(depth ==1) %>%
mutate(Name2 = gsub("root","classified",Name2)) %>%
distinct() %>%
ggplot(aes(x=Sample, y = Prop, fill = Name2)) +
geom_col() +
coord_flip() + scale_fill_d3()
1. A nivel madres gestantes (Un Ăºnico factor: Tratamiento = tres grupos)
â—¦ Control (sin inyecciĂ³n de testosterona) (n=4)
â—¦ 5 mgr (inyecciĂ³n de 5 mgr testosterona) (n=5)
â—¦ 20 mgr (inyecciĂ³n de 20 mgr testosterona) (n=5)
Existe diferencia en la microbiota de las madres teniendo en cuenta el factor tratamiento? alpha y beta diversidad. • Hay discriminaciĂ³n entre grupos (la androgenizaciĂ³n influye en la microbiota)? • DĂ³nde estĂ¡ la diferencia? Control vs 5mg vs 20mg vs Control.
Creamos la OTU-table para resolver la pregunta.
samplingSize <- table %>%
filter(grepl("G",TaxRank)) %>%
filter(Studio ==2) %>%
group_by(Sample) %>%
summarise(TotalReads = sum(Reads)) %>%
slice_min(TotalReads)
otu_table_q1 <- table %>%
filter(Studio ==2) %>%
filter(grepl("G",TaxRank)) %>%
dplyr::select(Sample,Reads,NCBI) %>%
pivot_wider(names_from = Sample,
values_from = Reads,
values_fill =0) %>%
column_to_rownames("NCBI") %>%
as.matrix() %>%
t() %>%
rrarefy(sample = samplingSize$TotalReads) %>%
t()
Calculamos la alpha diversity
simpson <- diversity(otu_table_q1, MARGIN = 2, index = "simpson") %>%
as.data.frame() %>%
rownames_to_column("Sample") %>%
rename(Simpson =2)
shannon <- diversity(otu_table_q1, MARGIN = 2,index = "shannon" ) %>%
as.data.frame() %>%
rownames_to_column("Sample") %>%
rename(Shannon = 2)
div <- inner_join(simpson, shannon) %>%
pivot_longer(names_to = "DiversityIndex",values_to = "DiversityValue", -Sample) %>%
inner_join(metadata)
## Joining, by = "Sample"
## Joining, by = "Sample"
div %>%
ggplot(aes(x = NumReads, y = DiversityValue)) +
geom_point() +
facet_wrap(.~DiversityIndex, scales = "free")
div %>%
filter(DiversityIndex == "Shannon") %>%
pull(DiversityValue) %>%
shapiro.test()
##
## Shapiro-Wilk normality test
##
## data: .
## W = 0.93032, p-value = 0.3083
div %>%
filter(DiversityIndex == "Simpson") %>%
pull(DiversityValue) %>%
shapiro.test()
##
## Shapiro-Wilk normality test
##
## data: .
## W = 0.94282, p-value = 0.4557
div %>%
filter(DiversityIndex =="Shannon") %>%
ggbetweenstats(x = GRUPO, y = DiversityValue, type = "robust")
div %>%
filter(DiversityIndex =="Simpson") %>%
ggbetweenstats(x = GRUPO, y = DiversityValue, type = "robust")
No parece que haya diferencias entre la alpha diverisdad de los grupos.
Vamos a calcular la Beta-diversidad.
otu_table_q1 <- table %>%
filter(Studio ==2) %>%
filter(grepl("G",TaxRank)) %>%
dplyr::select(Sample,Reads,NCBI) %>%
pivot_wider(names_from = Sample,
values_from = Reads,
values_fill =0) %>%
column_to_rownames("NCBI") %>%
as.matrix() %>% t()
dist_q1 <- avgdist(otu_table_q1,sample = samplingSize$TotalReads, dmethod = "bray") %>%
as.matrix() %>%
as_tibble(rownames = "Sample") %>%
inner_join(metadata)
## Joining, by = "Sample"
pheatmap::pheatmap(dist_q1 %>%
dplyr::select(Sample:H3_S24.run1.report) %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
as.dist(),
annotation_row = dist_q1 %>%
dplyr::select(Sample,GRUPO) %>%
column_to_rownames("Sample"))
dist_ad <-dist_q1 %>%
dplyr::select(Sample:H3_S24.run1.report) %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
as.dist()
adonis2(dist_ad ~ GRUPO, dist_q1 %>%
dplyr::select(Sample,GRUPO) %>%
column_to_rownames("Sample"))
Podemos concluir que a nivel metagenomico no existen diferencias ni en alpha diversidad ni en beta diversidad entre los grupos de madres.
• QuĂ© taxas son las responsables de las diferencias (la androgenizaciĂ³n afecta a determinadas taxas)? CuĂ¡les discriminan mĂ¡s? • Es dosis dependiente? Ej: Control < 5mg < 20mg o al revĂ©s para determinados phylum o genera.
samplingSize <- table %>%
filter(Studio ==2) %>%
filter(grepl("G",TaxRank)) %>%
group_by(Sample) %>%
summarise(TotalReads = sum(Reads)) %>%
slice_min(TotalReads)
otu_table_q1 <- table %>%
filter(Studio ==2) %>%
filter(grepl("G",TaxRank)) %>%
dplyr::select(Sample,Reads,NCBI) %>%
pivot_wider(names_from = NCBI,
values_from = Reads,
values_fill =0) %>%
as.data.frame() %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
rrarefy(sample = samplingSize$TotalReads) %>%
as_tibble(rownames = "Sample") %>%
pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>%
inner_join(metadata) %>%
mutate(GRUPO = as.factor(GRUPO))
## Joining, by = "Sample"
gamlss_q1 <- otu_table_q1 %>%
group_by(Sample) %>%
mutate(TotalReads = sum(Reads), NSpecies = sum(Reads>0)) %>%
ungroup() %>%
group_by(NCBI) %>%
mutate(Nzeros = sum(Reads ==0)) %>%
ungroup() %>%
mutate(NSamples = n_distinct(Sample)) %>%
mutate(Freq = (Reads+1)/TotalReads) %>%
mutate(ZeroFreq = Nzeros/NSamples) %>%
filter(ZeroFreq < 0.9) %>%
dplyr::select(NCBI,Reads,Freq,ZeroFreq,GRUPO,Sample) %>%
mutate(Reads = Reads+1) %>%
nest_by(NCBI) %>%
mutate(model = list(gamlss(formula = Freq ~ GRUPO,
data = data,
family = "BE",
control = gamlss.control(n.cyc = 200, trace = F)))) %>%
mutate(test = list(emmeans(model, specs = "GRUPO", data = data) %>%
pairs() %>%
as.data.frame()))
gamlss_q1 %>%
dplyr::select(NCBI,test) %>%
unnest(test) %>%
mutate(final_pvalue = p.adjust(p.value,method = "BH")) %>%
inner_join(table %>%
dplyr::select(NCBI,Name2) %>%
mutate(NCBI = as.character(NCBI)) %>%
distinct()) %>%
dplyr::select(NCBI,Name = Name2,contrast,z.ratio,final_pvalue)
## Joining, by = "NCBI"
gamlss_q1 %>%
dplyr::select(NCBI,test) %>%
unnest(test) %>%
mutate(final_pvalue = p.adjust(p.value,method = "BH")) %>%
inner_join(table %>%
dplyr::select(NCBI,Name2) %>%
mutate(NCBI = as.character(NCBI)) %>%
distinct()) %>%
dplyr::select(NCBI,Name = Name2,contrast,z.ratio,final_pvalue) %>%
separate(contrast, c("A","B"), sep = " - ") %>%
mutate(sig = ifelse(final_pvalue < 0.05,"Significative","No-Significative")) %>%
mutate(label = ifelse(sig == "Significative",Name,NA)) %>%
ggplot(aes(x= z.ratio, y = -log10(final_pvalue), color = sig, label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 4)+
scale_color_d3() +
theme_light() + facet_wrap(A ~ B)
## Joining, by = "NCBI"
## Warning: Removed 2024 rows containing missing values (geom_text_repel).
## Warning: ggrepel: 15 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 19 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
A nivel ratas en el destete (Dos factores: Tratamiento y Sexo)
â—¦ Control (51: 32 machos, 19 hembras)
â—¦ 5 mgr (55: 37 machos, 18 hembras)
â—¦ 20 mgr (12: 7 machos, 5 hembras)
Vamos a preparar la tabla
samplingSize <- table %>%
filter(grepl("G",TaxRank)) %>%
filter(grepl("Destete",Description)) %>%
group_by(Sample) %>%
summarise(TotalReads = sum(Reads)) %>%
slice_min(TotalReads)
otu_table_q2 <- table %>%
filter(grepl("G",TaxRank)) %>%
filter(grepl("Destete",Description)) %>%
dplyr::select(Sample,Reads,NCBI) %>%
pivot_wider(names_from = Sample,
values_from = Reads,
values_fill =0) %>%
column_to_rownames("NCBI") %>%
as.matrix() %>% t() %>% rrarefy(sample = samplingSize$TotalReads) %>% t()
Preparamos el metadata con las variables de tratamiento (Control , 5mg y 20mg) y sexo
meta_q2 <- metadata %>%
filter(grepl("Destete",Description)) %>%
mutate(Tratamiento = ifelse(Description == "Destete 20mg","20mg",Tratamiento)) %>%
mutate(Tratamiento = ifelse(Tratamiento == "0","Contro",Tratamiento)) %>%
mutate(Tratamiento = ifelse(Tratamiento == "1","5mg",Tratamiento)) %>%
dplyr::select(Sample,Tratamiento,Sexo,NumReads)
Existe diferencia en la microbiota de la progenie teniendo en cuenta el factor tratamiento? Y teniendo en cuenta el sexo? alpha y beta diversidad. ¿QuĂ© grupos se parecen mĂ¡s? (Eso se puede ver representando en la matriz de distancias los diferentes grupos y ver cuĂ¡les estĂ¡n mĂ¡s o menos cerca?). DĂ³nde estĂ¡n las diferencias (significaciĂ³n estadĂstica) y quĂ© taxas son las que discriminan entre los grupos?
Calculamos la alpha diversidad.
simpson <- diversity(otu_table_q2, MARGIN = 2, index = "simpson") %>%
as.data.frame() %>%
rownames_to_column("Sample") %>%
rename(Simpson =2)
shannon <- diversity(otu_table_q2, MARGIN = 2,index = "shannon" ) %>%
as.data.frame() %>%
rownames_to_column("Sample") %>%
rename(Shannon = 2)
div <- inner_join(simpson, shannon) %>%
pivot_longer(names_to = "DiversityIndex",values_to = "DiversityValue", -Sample) %>%
inner_join(meta_q2)
## Joining, by = "Sample"
## Joining, by = "Sample"
div %>%
ggplot(aes(x = NumReads, y = DiversityValue)) +
geom_point() +
facet_wrap(.~DiversityIndex, scales = "free")
div %>% filter(DiversityIndex =="Shannon") %>% ggbetweenstats(x = Tratamiento, y = DiversityValue, type = "robust")
div %>% filter(DiversityIndex =="Simpson") %>% ggbetweenstats(x = Tratamiento, y = DiversityValue, type = "robust")
Segun los test estadisticos podemos concluir que si existen diferencias en alpha diversidad entre los grupos.
Vamos a calcular la Beta-diversidad.
otu_table_q2 <- table %>%
filter(grepl("G",TaxRank)) %>%
filter(grepl("Destete",Description)) %>%
dplyr::select(Sample,Reads,NCBI) %>%
pivot_wider(names_from = Sample,
values_from = Reads,
values_fill =0) %>%
column_to_rownames("NCBI") %>%
as.matrix() %>% t()
dist_q2 <- avgdist(otu_table_q2,sample = samplingSize$TotalReads, dmethod = "bray") %>%
as.matrix() %>%
as_tibble(rownames = "Sample") %>%
inner_join(meta_q2)
## Joining, by = "Sample"
pheatmap::pheatmap(dist_q2 %>% dplyr::select(Sample,ends_with("report")) %>% column_to_rownames("Sample") %>% as.matrix() %>% as.dist(),
annotation_row = dist_q2 %>%
dplyr::select(Sample,Tratamiento,Sexo) %>%
column_to_rownames("Sample"))
dist_ad <-dist_q2 %>% dplyr::select(Sample, ends_with("report")) %>% column_to_rownames("Sample") %>% as.matrix() %>% as.dist()
adonis2(dist_ad ~ Tratamiento*Sexo, dist_q2 %>%
dplyr::select(Sample,Tratamiento,Sexo) %>%
column_to_rownames("Sample"), permutations = 1000)
Parece que existe una diferencia composicional entre los grupos de tratamiento pero no entre el sexo.
samplingSize <- table %>%
filter(grepl("G",TaxRank)) %>%
filter(grepl("Destete",Description)) %>%
group_by(Sample) %>%
summarise(TotalReads = sum(Reads)) %>%
slice_min(TotalReads)
otu_table_q2 <- table %>%
filter(grepl("G",TaxRank)) %>%
filter(grepl("Destete",Description)) %>%
dplyr::select(Sample,Reads,NCBI) %>%
pivot_wider(names_from = NCBI,
values_from = Reads,
values_fill =0) %>%
as.data.frame() %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
rrarefy(sample = samplingSize$TotalReads) %>%
as_tibble(rownames = "Sample") %>%
pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>%
inner_join(metadata) %>%
mutate(Tratamiento = ifelse(Tratamiento == "0","Control",Tratamiento)) %>%
mutate(Tratamiento = ifelse(Tratamiento == "1","5mg",Tratamiento)) %>%
mutate(Tratamiento = ifelse(is.na(Tratamiento),"20mg",Tratamiento)) %>%
mutate(Sexo = fct_recode(as.factor(Sexo),"Hembra" ="0","Macho" ="1"))
## Joining, by = "Sample"
gamlss_q2 <- otu_table_q2 %>%
group_by(Sample) %>%
mutate(TotalReads = sum(Reads), NSpecies = sum(Reads>0)) %>%
ungroup() %>%
group_by(NCBI) %>%
mutate(Nzeros = sum(Reads ==0)) %>%
ungroup() %>%
mutate(NSamples = n_distinct(Sample)) %>%
mutate(Freq = (Reads+1)/TotalReads) %>%
mutate(ZeroFreq = Nzeros/NSamples) %>%
filter(ZeroFreq < 0.9) %>%
dplyr::select(NCBI,Reads,Freq,ZeroFreq,Tratamiento,Sexo,Sample) %>%
mutate(Reads = Reads+1) %>%
nest_by(NCBI) %>%
mutate(model = list(gamlss(formula = Freq ~ Tratamiento + Sexo,
data = data,
family = "BE",
control = gamlss.control(n.cyc = 200, trace = F)))) %>% mutate(test = list(emmeans(model, specs = "Tratamiento", data = data) %>% pairs() %>% as.data.frame()))
gamlss_q2 %>%
dplyr::select(NCBI,test) %>%
unnest(test) %>%
mutate(final_pvalue = p.adjust(p.value,method = "BH")) %>%
inner_join(table %>%
dplyr::select(NCBI,Name2) %>%
mutate(NCBI = as.character(NCBI)) %>%
distinct()) %>%
dplyr::select(NCBI,Name = Name2,contrast,z.ratio,final_pvalue)
## Joining, by = "NCBI"
gamlss_q2 %>%
dplyr::select(NCBI,test) %>%
unnest(test) %>%
mutate(final_pvalue = p.adjust(p.value,method = "BH")) %>%
inner_join(table %>%
dplyr::select(NCBI,Name2) %>%
mutate(NCBI = as.character(NCBI)) %>%
distinct()) %>%
dplyr::select(NCBI,Name = Name2,contrast,z.ratio,final_pvalue) %>%
separate(contrast, c("A","B"), sep = " - ") %>%
mutate(sig = ifelse(final_pvalue < 0.01,"Significative","No-Significative")) %>%
mutate(label = ifelse(sig == "Significative",Name,NA)) %>%
ggplot(aes(x= z.ratio, y = -log10(final_pvalue), color = sig, label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 4)+
scale_color_d3() +
theme_light() + facet_wrap(A ~ B)
## Joining, by = "NCBI"
## Warning: Removed 2026 rows containing missing values (geom_text_repel).
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
A nivel ratas adultas [Factores: Tratamiento, Dieta, y Grupo (machos, hembras y machos castrados)]
Vamos a preparar la tabla
samplingSize <- table %>%
filter(grepl("G",TaxRank)) %>%
filter(grepl("Adulta",Description)) %>%
group_by(Sample) %>%
summarise(TotalReads = sum(Reads)) %>%
slice_min(TotalReads)
otu_table_q3 <- table %>%
filter(grepl("G",TaxRank)) %>%
filter(grepl("Adulta",Description)) %>%
dplyr::select(Sample,Reads,NCBI) %>%
pivot_wider(names_from = Sample,
values_from = Reads,
values_fill =0) %>%
column_to_rownames("NCBI") %>%
as.matrix() %>% t() %>% rrarefy(sample = samplingSize$TotalReads) %>% t()
Preparamos el metadata con las variables de tratamiento (Control , 5mg y 20mg) y sexo
meta_q3 <- metadata %>%
filter(grepl("Adulta",Description)) %>%
mutate(Tratamiento = ifelse(Tratamiento == "0","Control",Tratamiento)) %>%
mutate(Tratamiento = ifelse(Tratamiento == "1","5mg",Tratamiento)) %>%
mutate(Dieta = ifelse(Dieta ==0,"Dieta_No","Dieta_Si")) %>%
mutate(Grupo = fct_recode(as.factor(Sexo + Castrado),"Hembra" ="0","Macho" ="1", "Macho_Castrado" ="2" )) %>%
dplyr::select(Sample,Tratamiento,Sexo,Dieta,Castrado,NumReads,Grupo,Madre,NºJaula) %>% mutate_if(is.character, as.factor)
Calculamos la alpha diversidad.
simpson <- diversity(otu_table_q3, MARGIN = 2, index = "simpson") %>%
as.data.frame() %>%
rownames_to_column("Sample") %>%
rename(Simpson =2)
shannon <- diversity(otu_table_q3, MARGIN = 2,index = "shannon" ) %>%
as.data.frame() %>%
rownames_to_column("Sample") %>%
rename(Shannon = 2)
div <- inner_join(simpson, shannon) %>%
pivot_longer(names_to = "DiversityIndex",values_to = "DiversityValue", -Sample) %>%
inner_join(meta_q3) %>%
mutate_if(is.character, as.factor) %>%
mutate(Dieta = fct_recode(as.factor(Dieta),"Dieta Si" = "1", "Dieta No" ="0"))
## Joining, by = "Sample"
## Joining, by = "Sample"
## Warning: Unknown levels in `f`: 1, 0
div %>%
ggplot(aes(x = NumReads, y = DiversityValue)) +
geom_point() +
facet_wrap(.~DiversityIndex, scales = "free")
div %>% ggplot(aes(x=Grupo, y = DiversityValue, fill = Tratamiento)) + geom_boxplot() + facet_grid(DiversityIndex ~ Dieta, scales = "free_y")
div %>% filter(DiversityIndex =="Shannon") %>% ggbetweenstats(x = Tratamiento, y = DiversityValue, type = "bayes",title = "Shannon Diversity in Treatment")
div %>% filter(DiversityIndex =="Simpson") %>% ggbetweenstats(x = Tratamiento, y = DiversityValue, type = "bayes",title = "Simpson Diversity in Treatment")
div %>% filter(DiversityIndex =="Shannon") %>% ggbetweenstats(x = Grupo, y = DiversityValue, type = "bayes",title = "Shannon Diversity in Group")
div %>% filter(DiversityIndex =="Simpson") %>% ggbetweenstats(x = Grupo, y = DiversityValue, type = "bayes",title = "Simpson Diversity in Group")
div %>% filter(DiversityIndex =="Shannon") %>% ggbetweenstats(x = Dieta, y = DiversityValue, type = "bayes",title = "Shannon Diversity in Dieta")
div %>% filter(DiversityIndex =="Simpson") %>% ggbetweenstats(x = Dieta, y = DiversityValue, type = "bayes",title = "Simpson Diversity in Dieta")
Vamos a tratar de realizar un ANOVA o Kruskal-Wallis de las alpha diversity.
input_table <- div %>%
dplyr::select(Sample,DiversityIndex,DiversityValue,Tratamiento,Grupo,Dieta) %>%
pivot_wider(names_from = DiversityIndex, values_from = DiversityValue) %>% mutate_if(is.character, as.factor)
Modelos ANOVA sobre el indice Shannon
input_table %>% rstatix::anova_test(Shannon ~ Dieta * Tratamiento * Grupo)
## Coefficient covariances computed by hccm()
Lo resultados parecen descartar cualquier interacciĂ³n, por lo que realizamos el anĂ¡lisis pos-hoc sin interacciones.
input_table %>% rstatix::tukey_hsd(Shannon ~ Grupo + Dieta + Tratamiento)
Modelos GLM sobre el indice Simpson (Simpson no es parametrico)
mod <- gamlss(Simpson ~ Dieta + Tratamiento + Grupo, data = input_table, family ="GA")
## GAMLSS-RS iteration 1: Global Deviance = -150.8023
## GAMLSS-RS iteration 2: Global Deviance = -150.8023
summary(mod)
## ******************************************************************
## Family: c("GA", "Gamma")
##
## Call: gamlss(formula = Simpson ~ Dieta + Tratamiento + Grupo,
## family = "GA", data = input_table)
##
## Fitting method: RS()
##
## ------------------------------------------------------------------
## Mu link function: log
## Mu Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.281534 0.032431 -8.681 8.76e-14 ***
## DietaDieta_Si 0.069622 0.029833 2.334 0.0217 *
## TratamientoControl -0.041804 0.029858 -1.400 0.1646
## GrupoMacho -0.002112 0.036282 -0.058 0.9537
## GrupoMacho_Castrado 0.046875 0.036285 1.292 0.1994
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## ------------------------------------------------------------------
## Sigma link function: log
## Sigma Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.88579 0.06907 -27.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## ------------------------------------------------------------------
## No. of observations in the fit: 104
## Degrees of Freedom for the fit: 6
## Residual Deg. of Freedom: 98
## at cycle: 2
##
## Global Deviance: -150.8023
## AIC: -138.8023
## SBC: -122.936
## ******************************************************************
estimate_contrasts(mod, contrast = c("Dieta","Tratamiento","Grupo"))
Vamos a calcular la Beta-diversidad.
samplingSize <- table %>%
filter(grepl("G",TaxRank)) %>%
filter(grepl("Adulta",Description)) %>%
group_by(Sample) %>%
summarise(TotalReads = sum(Reads)) %>%
slice_min(TotalReads)
otu_table_q3 <- table %>%
filter(grepl("G",TaxRank)) %>%
filter(grepl("Adulta",Description)) %>%
dplyr::select(Sample,Reads,NCBI) %>%
pivot_wider(names_from = Sample,
values_from = Reads,
values_fill =0) %>%
column_to_rownames("NCBI") %>%
as.matrix() %>% t()
dist_q3 <- avgdist(otu_table_q3,sample = samplingSize$TotalReads, dmethod = "bray") %>%
as.matrix() %>%
as_tibble(rownames = "Sample") %>%
inner_join(meta_q3)
## Joining, by = "Sample"
pheatmap::pheatmap(dist_q3 %>%
dplyr::select(Sample,ends_with("report")) %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
as.dist(),
annotation_row = dist_q3 %>%
dplyr::select(Sample,Tratamiento,Grupo,Dieta, Madre, NºJaula) %>%
mutate(NºJaula = as.factor(NºJaula)) %>%
column_to_rownames("Sample"))
dist_ad <-dist_q3 %>%
dplyr::select(Sample, ends_with("report")) %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
as.dist()
adonis2(dist_ad ~ Tratamiento*Grupo*Dieta*Madre*NºJaula , dist_q3 %>%
dplyr::select(Sample,Tratamiento,Grupo,Dieta,Madre,NºJaula) %>%
mutate(NºJaula = as.factor(NºJaula)) %>%
column_to_rownames("Sample"), permutations = 1000)
Como parece que no existen interacciones repetimos el analisis sin interacciones
adonis2(dist_ad ~ Tratamiento+Grupo+Dieta+Madre+NºJaula , dist_q3 %>%
dplyr::select(Sample,Tratamiento,Grupo,Dieta,Madre,NºJaula) %>%
mutate(NºJaula = as.factor(NºJaula)) %>%
column_to_rownames("Sample"), permutations = 1000)
samplingSize <- table %>%
filter(grepl("G",TaxRank)) %>%
filter(grepl("Adulta",Description)) %>%
group_by(Sample) %>%
summarise(TotalReads = sum(Reads)) %>%
slice_min(TotalReads)
otu_table_q3 <- table %>%
filter(grepl("G",TaxRank)) %>%
filter(grepl("Adulta",Description)) %>%
dplyr::select(Sample,Reads,NCBI) %>%
pivot_wider(names_from = NCBI,
values_from = Reads,
values_fill =0) %>%
as.data.frame() %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
rrarefy(sample = samplingSize$TotalReads) %>%
as_tibble(rownames = "Sample") %>%
pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>%
inner_join(metadata) %>%
mutate(Dieta = ifelse(Dieta == 1, "Dieta_Si","Dieta_No")) %>%
mutate(Tratamiento = ifelse(Tratamiento == "0","Control",Tratamiento)) %>%
mutate(Tratamiento = ifelse(Tratamiento == "1","5mg",Tratamiento)) %>%
mutate(Grupo = fct_recode(as.factor(Sexo + Castrado),"Hembra" ="0","Macho" ="1", "Macho_Castrado" ="2" ))
## Joining, by = "Sample"
gamlss_q3 <- otu_table_q3 %>%
group_by(Sample) %>%
mutate(TotalReads = sum(Reads), NSpecies = sum(Reads>0)) %>%
ungroup() %>%
group_by(NCBI) %>%
mutate(Nzeros = sum(Reads ==0)) %>%
ungroup() %>%
mutate(NSamples = n_distinct(Sample)) %>%
mutate(Freq = (Reads+1)/TotalReads) %>%
mutate(ZeroFreq = Nzeros/NSamples) %>%
filter(ZeroFreq < 0.9) %>%
dplyr::select(NCBI,Reads,Freq,ZeroFreq,Dieta,Grupo,Tratamiento,Sample) %>%
mutate(Reads = Reads+1) %>%
nest_by(NCBI) %>%
mutate(model = list(gamlss(formula = Freq ~ Dieta + Grupo+ Tratamiento,
data = data,
family = "BE",
control = gamlss.control(n.cyc = 200, trace = F))))
gamlss_q3 %>%
mutate(test = list(emmeans(model, specs = "Dieta", data = data) %>%
pairs() %>%
as.data.frame())) %>%
dplyr::select(NCBI,test) %>%
unnest(test) %>%
mutate(final_pvalue = p.adjust(p.value,method = "BH")) %>%
inner_join(table %>%
dplyr::select(NCBI,Name2) %>%
mutate(NCBI = as.character(NCBI)) %>%
distinct()) %>%
dplyr::select(NCBI,Name = Name2,contrast,z.ratio,final_pvalue) %>%
separate(contrast, c("A","B"), sep = " - ") %>%
mutate(sig = ifelse(final_pvalue < 0.001,"Significative","No-Significative")) %>%
mutate(label = ifelse(sig == "Significative",Name,NA)) %>%
ggplot(aes(x= z.ratio, y = -log10(final_pvalue), color = sig, label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 4)+
scale_color_d3() +
theme_light() + facet_wrap(A ~ B)
## Joining, by = "NCBI"
## Warning: Removed 614 rows containing missing values (geom_text_repel).
## Warning: ggrepel: 58 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Test a todos los rangos
samplingSize <- table %>%
filter(TaxRank == "R") %>%
filter(grepl("Adulta",Description)) %>%
group_by(Sample) %>%
summarise(TotalReads = sum(Reads)) %>%
slice_min(TotalReads)
otu_table_q3 <- table %>%
filter(grepl("Adulta",Description)) %>%
dplyr::select(Sample,Reads,NCBI) %>%
pivot_wider(names_from = NCBI,
values_from = Reads,
values_fill =0) %>%
as.data.frame() %>%
column_to_rownames("Sample") %>%
as.matrix() %>%
rrarefy(sample = samplingSize$TotalReads) %>%
as_tibble(rownames = "Sample") %>%
pivot_longer(names_to = "NCBI", values_to = "Reads",-Sample) %>%
inner_join(metadata) %>%
mutate(Dieta = ifelse(Dieta == 1, "Dieta_Si","Dieta_No")) %>%
mutate(Tratamiento = ifelse(Tratamiento == "0","Control",Tratamiento)) %>%
mutate(Tratamiento = ifelse(Tratamiento == "1","5mg",Tratamiento)) %>%
mutate(Grupo = fct_recode(as.factor(Sexo + Castrado),"Hembra" ="0","Macho" ="1", "Macho_Castrado" ="2" ))
## Joining, by = "Sample"
samplingSize <- table %>%
filter(TaxRank == "R") %>%
filter(grepl("Adulta",Description)) %>%
rename(TotalReads = Reads) %>%
group_by(Sample) %>%
slice_max(TotalReads) %>%
dplyr::select(Sample, TotalReads)
gamlss_q3 <- otu_table_q3 %>%
inner_join(samplingSize) %>%
group_by(Sample) %>%
mutate(NSpecies = sum(Reads>0)) %>%
ungroup() %>%
group_by(NCBI) %>%
mutate(Nzeros = sum(Reads ==0)) %>%
ungroup() %>%
mutate(NSamples = n_distinct(Sample)) %>%
mutate(Freq = (Reads+1)/TotalReads) %>%
mutate(ZeroFreq = Nzeros/NSamples) %>%
filter(ZeroFreq < 0.9) %>%
dplyr::select(NCBI,Reads,Freq,ZeroFreq,Dieta,Grupo,Tratamiento,Sample) %>%
mutate(Reads = Reads+1) %>%
nest_by(NCBI) %>%
mutate(model = list(gamlss(formula = Freq ~ Dieta + Grupo+ Tratamiento,
data = data,
family = "BE",
control = gamlss.control(n.cyc = 200, trace = F))))
## Joining, by = "Sample"
gamlss_q3 %>%
mutate(test = list(emmeans(model, specs = "Dieta", data = data) %>%
pairs() %>%
as.data.frame())) %>%
dplyr::select(NCBI,test) %>%
unnest(test) %>%
mutate(final_pvalue = p.adjust(p.value,method = "BH")) %>%
inner_join(table %>%
dplyr::select(NCBI,Name2,TaxRank) %>%
mutate(NCBI = as.character(NCBI)) %>%
distinct()) %>%
dplyr::select(NCBI,TaxRank,Name = Name2,contrast,z.ratio,final_pvalue) %>%
separate(contrast, c("A","B"), sep = " - ") %>%
mutate(sig = ifelse(final_pvalue < 0.001,"Significative","No-Significative")) %>%
mutate(label = ifelse(sig == "Significative",Name,NA)) %>%
ggplot(aes(x= z.ratio, y = -log10(final_pvalue), color = sig, label = label)) +
geom_point(alpha = 0.5) +
geom_text_repel(size = 4)+
scale_color_d3() +
theme_light() + facet_wrap(A ~ B)
## Joining, by = "NCBI"
## Warning: Removed 615 rows containing missing values (geom_text_repel).
## Warning: ggrepel: 75 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Cuestion 1: Machos vs Hembras (Dieta NO, Tratamiento Control, Studio 3, Castrados no) formula: ~ Sexo
Cuestion 2: Machos vs Hembras (Dieta SI/NO, Tratamiento Control, Studio 3, Castrados no) formula: ~Sexo * Dieta
Cuestion 3.1: Machos vs Machos Castrados (Dieta NO, Tratamiento Control, Studio 3, Hembras NO) formula: ~Castrado
Cuestion 3.2: Hembras Tratamiento control vs Hembras Tratamiento (Dieta NO, Studio3, Machos NO) forumula: ~Tratamiento
Cuestion 4.1: Machos vs Machos Castrados (Dieta SI/NO, Tratamiento Control, Studio 3, Hembras NO) formula: ~Castrado * Dieta
Cuestion 4.2: Hembras Tratamiento control vs Hembras Tratamiento (Dieta SI/NO, Studio3, Machos NO) forumula: ~Tratamiento * Dieta
Cuestion 5.1: Crear nueva variable. Disfuncion gonadal (SI <- Machos Castrados, Hembras Tratamiento Si, El resto NO) Sampling: Hembras Dieta NO, Tratameinto SI/NO Machos Dieta NO, Tratamiento NO Machos Castrados, Dieta NO, Tratamiento NO formula = ~ Sexo * Disfuncion Gonadal
Cuestion 5.2 Crear nueva variable. Disfuncion gonadal (SI <- Machos Castrados, Hembras Tratamiento Si, El resto NO) Sampling: Hembras Dieta SI/NO, Tratameinto SI/NO Machos Dieta SI/NO, Tratamiento NO Machos Castrados, Dieta SI/NO, Tratamiento NO formula = ~ Sexo * Disfuncion Gonadal * Dieta